1 Macula Densa Project

1.1 Objectives

Three Main Goals of this File Produce Cleaner looking code. Identify the amount of clusters there are Identify the top genes expressed in each of the clusters

1.2 Problems I need to Fix

Save things as RDS file so I dont have to rerun the whole code

2 Loading in Data sets + Library packages.

options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976
SO5 <- LoadSeuratRds(here("jk_code", "SO5.rds"))

head(SO5@meta.data)

3 Analyzing the SO5 DATASET

DimPlot(SO5,split.by ="sample")

DimPlot(SO5)

Based off this I can see that

SO1-> control SO2 -> low_salt SO3 -> low_salt SO4 -> control

SO5 <- FindNeighbors(SO5, dims = 1:30, verbose = F)
SO5 <- FindClusters(SO5, resolution = 0.25)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11426
## Number of edges: 376311
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8730
## Number of communities: 6
## Elapsed time: 1 seconds
SO5m <- FindAllMarkers(SO5, only.pos = TRUE)
## Calculating cluster 0
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
SO5m %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
SO5m %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 5) %>%
    ungroup() -> top10
DoHeatmap(SO5, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(SO5, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the SCT assay: Ifi47,
## Rpl3-ps1

DimPlot(SO5)

FeaturePlot(SO5,"S100g",split.by = "treatment") #cluster

FeaturePlot(SO5,"Junb",split.by = "treatment") # cluster

FeaturePlot(SO5,"Cxcl10",split.by = "treatment") # cluster

FeaturePlot(SO5,"Pappa2",split.by = "treatment")

DimPlot(SO5,split.by = "treatment")

4 Identifying Clusters

4.1 Cluster 5

# Cxcl10
FeaturePlot(SO5, features = "Cxcl10", split.by = "treatment")

# Ifit1
FeaturePlot(SO5, features = "Ifit1", split.by = "treatment")

# Isg15
FeaturePlot(SO5, features = "Isg15", split.by = "treatment")

# Gbp10
FeaturePlot(SO5, features = "Gbp10", split.by = "treatment")

# Ifi47
FeaturePlot(SO5, features = "Ifi47", split.by = "treatment")

4.2 Cluster 4

# Fos
FeaturePlot(SO5, features = "Fos", split.by = "treatment")

# Junb
FeaturePlot(SO5, features = "Junb", split.by = "treatment")

# Egr1
FeaturePlot(SO5, features = "Egr1", split.by = "treatment")

# Fosb
FeaturePlot(SO5, features = "Fosb", split.by = "treatment")

# Zfp36
FeaturePlot(SO5, features = "Zfp36", split.by = "treatment")

4.3 Cluster 2

# Egf
FeaturePlot(SO5, features = "Egf", split.by = "treatment")

# Krt7
FeaturePlot(SO5, features = "Krt7", split.by = "treatment")

# Fabp3
FeaturePlot(SO5, features = "Fabp3", split.by = "treatment")

# Cldn19
FeaturePlot(SO5, features = "Cldn19", split.by = "treatment")

# Tmem52b
FeaturePlot(SO5, features = "Tmem52b", split.by = "treatment")

4.4 Cluster 0

# Mcub
FeaturePlot(SO5, features = "Mcub", split.by = "treatment")

# Aard
FeaturePlot(SO5, features = "Aard", split.by = "treatment")

# Fetub
FeaturePlot(SO5, features = "Fetub", split.by = "treatment")

Observation : MCUB seems to be the highly defined gene in low salt

4.5 Cluster 1

# S100g
FeaturePlot(SO5, features = "S100g", split.by = "treatment")

## Cluster 3

# Leng9
FeaturePlot(SO5, features = "Leng9", split.by = "treatment")

# Subset Cluster 0, 1, 3

# SO6 checking another cluster
SO6<- subset(SO5, idents = c("0","1","3"))

SO6 <- FindNeighbors(SO6, dims = 1:30, verbose = F)
SO6 <- FindClusters(SO6, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8805
## Number of edges: 290708
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9087
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(SO6,split.by = "sample")

SO6m <- FindAllMarkers(SO6, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
SO6m %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 0.5)
DimPlot(SO6,split.by = "sample")

DimPlot(SO6,split.by = "sample", group.by = "treatment" )

head(SO6@meta.data)
FeaturePlot(SO6,"Pappa2",split.by = "sample")

4.6 Clusters

# S100g
FeaturePlot(SO6, features = "S100g")

# Aard
FeaturePlot(SO6, features = "Aard")

# Mcub
FeaturePlot(SO6, features = "Mcub")

DimPlot(SO6,group.by = "sample",split.by = "sample")

DimPlot(SO6,group.by = "treatment",split.by = "sample")

My guess is that these are the same, As you go from control to low_salt the cells start to express different genes. How can I test this?

I think the next step after I figure out something with these clusters is to figure out what each of these top genes do, the functions, and purpose of them.